package spark.MLlib;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

/**
 * 作者: LDL
 * 功能说明:
 * 创建日期: 2015/6/30 16:37
 */
public class SimpleApp {
    public static void main(String[] args) {
        System.setProperty("hadoop.home.dir", "D:\\develop\\tools\\hadoop-common-2.2.0-bin-master");
        String logFile = "D:/develop/tools/spark-1.4.0/README.md"; // Should be some file on your system
        SparkConf conf = new SparkConf().setMaster("local").setAppName("Simple Application");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaRDD<String> logData = sc.textFile(logFile).cache();

        long numAs = logData.filter(new Function<String, Boolean>() {
            public Boolean call(String s) {
                return s.contains("a");
            }
        }).count();

        long numBs = logData.filter(new Function<String, Boolean>() {
            public Boolean call(String s) { return s.contains("b"); }
        }).count();

        System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);

    }
}
